library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(rjson)
library(jpeg)
library(magick)
## Linking to ImageMagick 6.9.12.3
## Enabled features: cairo, freetype, fftw, ghostscript, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fontconfig, x11
library(purrr)
library(tibble)
library(tidyr)
library(dplyr)
library(ggplot2)
library(stringr)
library(keras)
use_condaenv('r-reticulate')
library(tensorflow)
physical_devices <- tf$config$list_physical_devices('GPU')
tf$config$experimental$set_memory_growth(device = physical_devices[[1]], enable = T)

Data Wrangling

# # Read images
# df <- read_delim("CUB_200_2011/images.txt",
#                  col_names = c("Image_ID","File_Path"))
# # Fix paths
# df <- df %>% mutate(filepath = file.path(getwd(),"CUB_200_2011/images",File_Path),
#                     filename = basename(File_Path))
# # One time capture image original dimensions
# # H x W
# df$image_height <- NA
# df$image_width <- NA
# for(i in 1:nrow(df)){
#   img <- readJPEG(df[[i,"filepath"]])
#   df$image_height[i] <- dim(img)[1]
#   df$image_width[i] <- dim(img)[2]
# }
# 
# image_sizes <- df %>% select(Image_ID,image_height,image_width)
# write_delim(image_sizes, file = "CUB_200_2011/image_sizes.txt", col_names = F)
# Add training split
# df <- df %>% left_join(read_delim("CUB_200_2011/train_test_split.txt",
#                                   col_names = c("Image_ID", "Training_Image")),
#                        by = "Image_ID")
# # Add class ID
# df <- df %>% left_join(read_delim("CUB_200_2011/image_class_labels.txt",
#                                   col_names = c("Image_ID", "Class_ID")),
#                        by = "Image_ID")
# # Add class label
# df <- df %>% left_join(read_delim("CUB_200_2011/classes.txt",
#                                   col_names = c("Class_ID", "Class_Label")),
#                        by = "Class_ID")
# # Subtract 1 from labels
# df <- df %>% mutate(Class_ID = as.integer(Class_ID) - 1)
# 
# # Add bounding boxes
# boxes <- read_delim("CUB_200_2011/bounding_boxes.txt",
#                     col_names = c("Image_ID", "x_left","y_top","bbox_width","bbox_height"))
# boxes <- boxes %>%
#   mutate(y_bottom = y_top + bbox_height - 1, x_right = x_left + bbox_width - 1)
# 
# df <- df %>%
#   inner_join(boxes, by = "Image_ID")
# 
# # Add image dimensions
# 
# df <- df %>% left_join(read_delim("CUB_200_2011/image_sizes.txt",
#                                   col_names = c("Image_ID", "image_height", "image_width")),
#                        by = "Image_ID")
# 
# # Easier name
# df <- df %>%
#   mutate(name = sub(".*\\.","",Class_Label))
# # Save df for quick access
# write_delim(df, "CUB_200_2011/Complete_df.tsv")

Load data frame

df <- read_delim("CUB_200_2011/Complete_df.tsv")
## Rows: 11788 Columns: 16
## -- Column specification --------------------------------------------------------
## Delimiter: " "
## chr  (5): File_Path, filepath, filename, Class_Label, name
## dbl (11): Image_ID, Training_Image, Class_ID, x_left, y_top, bbox_width, bbo...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
target_height <- 224
target_width <- 224

df <- df %>% mutate(
  x_left_scaled = (x_left / image_width * target_width) %>% round(),
  x_right_scaled = (x_right / image_width * target_width) %>% round(),
  y_top_scaled = (y_top / image_height * target_height) %>% round(),
  y_bottom_scaled = (y_bottom / image_height * target_height) %>% round(),
  bbox_width_scaled =  (bbox_width / image_width * target_width) %>% round(),
  bbox_height_scaled = (bbox_height / image_height * target_height) %>% round()
)
img_data <- df[4,]
img <- image_read(img_data$filepath)
img <- image_draw(img)
rect(
  img_data$x_left,
  img_data$y_bottom,
  img_data$x_right,
  img_data$y_top,
  border = "purple",
  lwd = 2
)
text(
  img_data$x_right,
  img_data$y_top,
  img_data$name,
  offset = 1,
  pos = 2,
  cex = 1.5,
  col = "purple"
)
dev.off()
## png 
##   2

Single object categorization

feature_extractor <-
  application_xception(
    include_top = FALSE,
    input_shape = c(224, 224, 3),
    pooling = "avg"
)

feature_extractor %>% freeze_weights()
model <- keras_model_sequential() %>%
  feature_extractor %>%
  layer_batch_normalization() %>%
  layer_dropout(rate = 0.25) %>%
  layer_dense(units = 512, activation = "relu") %>%
  layer_batch_normalization() %>%
  layer_dropout(rate = 0.5) %>%
  layer_dense(units = 200, activation = "softmax")

model %>% compile(
  optimizer = "adam",
  loss = "sparse_categorical_crossentropy",
  metrics = list("accuracy")
)
summary(model)
## Model: "sequential"
## ________________________________________________________________________________
##  Layer (type)                       Output Shape                    Param #     
## ================================================================================
##  xception (Functional)              (None, 2048)                    20861480    
##                                                                                 
##  batch_normalization_5 (BatchNormal  (None, 2048)                   8192        
##  ization)                                                                       
##                                                                                 
##  dropout_1 (Dropout)                (None, 2048)                    0           
##                                                                                 
##  dense_1 (Dense)                    (None, 512)                     1049088     
##                                                                                 
##  batch_normalization_4 (BatchNormal  (None, 512)                    2048        
##  ization)                                                                       
##                                                                                 
##  dropout (Dropout)                  (None, 512)                     0           
##                                                                                 
##  dense (Dense)                      (None, 200)                     102600      
##                                                                                 
## ================================================================================
## Total params: 22,023,408
## Trainable params: 1,156,808
## Non-trainable params: 20,866,600
## ________________________________________________________________________________
batch_size <- 10

load_and_preprocess_image <- function(image_name, target_height, target_width) {
  img_array <- image_load(
    file.path(image_name),
    target_size = c(target_height, target_width)
    ) %>%
    image_to_array() %>%
    xception_preprocess_input() 
  dim(img_array) <- c(1, dim(img_array))
  img_array
}

classification_generator <-
  function(data,
           target_height,
           target_width,
           shuffle,
           batch_size) {
    i <- 1
    function() {
      if (shuffle) {
        indices <- sample(1:nrow(data), size = batch_size)
      } else {
        if (i + batch_size >= nrow(data))
          i <<- 1
        indices <- c(i:min(i + batch_size - 1, nrow(data)))
        i <<- i + length(indices)
      }
      x <-
        array(0, dim = c(length(indices), target_height, target_width, 3))
      y <- array(0, dim = c(length(indices)))
      
      for (j in 1:length(indices)) {
        x[j, , , ] <-
          load_and_preprocess_image(data[[indices[j], "filepath"]],
                                    target_height, target_width)
        y[j] <-
          data[[indices[j], "Class_ID"]]
      }
      list(x, y)
    }
  }

train_gen <- classification_generator(
  df %>% filter(Training_Image == T),
  target_height = target_height,
  target_width = target_width,
  shuffle = TRUE,
  batch_size = batch_size
)

valid_gen <- classification_generator(
  df %>% filter(Training_Image == F),
  target_height = target_height,
  target_width = target_width,
  shuffle = TRUE,
  batch_size = batch_size
)
history <- model %>% fit(
  train_gen,
  epochs = 20,
  steps_per_epoch = nrow(df %>% filter(Training_Image == T)) / batch_size,
  validation_data = valid_gen,
  validation_steps = nrow(df %>% filter(Training_Image == F)) / batch_size,
  callbacks = list(
    callback_model_checkpoint(
      file.path("class_only", "weights.{epoch:02d}-{val_loss:.2f}.hdf5")
    ),
    callback_early_stopping(patience = 2)
  )
)
plot(history)
## `geom_smooth()` using formula 'y ~ x'

Bounding boxes

feature_extractor <- application_xception(
  include_top = FALSE,
  input_shape = c(224, 224, 3)
)

feature_extractor %>% freeze_weights()
model <- keras_model_sequential() %>%
  feature_extractor %>%
  layer_flatten() %>%
  layer_batch_normalization() %>%
  layer_dropout(rate = 0.25) %>%
  layer_dense(units = 512, activation = "relu") %>%
  layer_batch_normalization() %>%
  layer_dropout(rate = 0.5) %>%
  layer_dense(units = 4)
metric_iou <- function(y_true, y_pred) {
  
  # order is [x_left, y_top, x_right, y_bottom]
  intersection_xmin <- k_maximum(y_true[ ,1], y_pred[ ,1])
  intersection_ymin <- k_maximum(y_true[ ,2], y_pred[ ,2])
  intersection_xmax <- k_minimum(y_true[ ,3], y_pred[ ,3])
  intersection_ymax <- k_minimum(y_true[ ,4], y_pred[ ,4])
  
  area_intersection <- (intersection_xmax - intersection_xmin) * 
                       (intersection_ymax - intersection_ymin)
  area_y <- (y_true[ ,3] - y_true[ ,1]) * (y_true[ ,4] - y_true[ ,2])
  area_yhat <- (y_pred[ ,3] - y_pred[ ,1]) * (y_pred[ ,4] - y_pred[ ,2])
  area_union <- area_y + area_yhat - area_intersection
  
  iou <- area_intersection/area_union
  k_mean(iou)
  
}
model %>% compile(
  optimizer = "adam",
  loss = "mae",
  metrics = list(custom_metric("iou", metric_iou))
)
localization_generator <-
  function(data,
           target_height,
           target_width,
           shuffle,
           batch_size) {
    i <- 1
    function() {
      if (shuffle) {
        indices <- sample(1:nrow(data), size = batch_size)
      } else {
        if (i + batch_size >= nrow(data))
          i <<- 1
        indices <- c(i:min(i + batch_size - 1, nrow(data)))
        i <<- i + length(indices)
      }
      x <-
        array(0, dim = c(length(indices), target_height, target_width, 3))
      y <- array(0, dim = c(length(indices), 4))
      
      for (j in 1:length(indices)) {
        x[j, , , ] <-
          load_and_preprocess_image(data[[indices[j], "filepath"]], 
                                    target_height, target_width)
        y[j, ] <-
          data[indices[j], c("x_left_scaled",
                             "y_top_scaled",
                             "x_right_scaled",
                             "y_bottom_scaled")] %>% as.matrix()
      }
      list(x, y)
    }
  }

train_gen <- localization_generator(
  df %>% filter(Training_Image == T),
  target_height = target_height,
  target_width = target_width,
  shuffle = TRUE,
  batch_size = batch_size
)

valid_gen <- localization_generator(
  df %>% filter(Training_Image == F),
  target_height = target_height,
  target_width = target_width,
  shuffle = FALSE,
  batch_size = batch_size
)
history <- model %>% fit(
  train_gen,
  epochs = 20,
  steps_per_epoch = nrow(df %>% filter(Training_Image == T)) / batch_size,
  validation_data = valid_gen,
  validation_steps = nrow(df %>% filter(Training_Image == F)) / batch_size,
  callbacks = list(
    callback_model_checkpoint(
      file.path("loc_only", "weights.{epoch:02d}-{val_loss:.2f}.hdf5")
    ),
    callback_early_stopping(patience = 2)
  )
)
plot(history)
## `geom_smooth()` using formula 'y ~ x'

plot_image_with_boxes <- function(file_name,
                                  object_class,
                                  box,
                                  scaled = FALSE,
                                  class_pred = NULL,
                                  box_pred = NULL) {
  img <- image_read(file_name)
  if(scaled) img <- image_resize(img, geometry = "224x224!")
  img <- image_draw(img)
  x_left <- box[1]
  y_bottom <- box[2]
  x_right <- box[3]
  y_top <- box[4]
  rect(
    x_left,
    y_bottom,
    x_right,
    y_top,
    border = "cyan",
    lwd = 2.5
  )
  if (!is.null(box_pred)){
    rect(box_pred[1],
         box_pred[2],
         box_pred[3],
         box_pred[4],
         border = "yellow",
         lwd = 2.5)
  }
  if (!is.null(class_pred)){
    text(
      box_pred[1],
      box_pred[2],
      class_pred,
      offset = 0,
      pos = 4,
      cex = 1.5,
      col = "yellow")
  }
  dev.off()
  img %>% image_write(paste0("CUB_200_2011/preds_", object_class, ".jpg"))
  plot(img)
}
train_1_8 <- head(df %>% filter(Training_Image == T), 8) %>%
  select(filepath,
         name,
         x_left_scaled, y_top_scaled, x_right_scaled, y_bottom_scaled)

for (i in 1:8) {
  preds <-
    model %>% predict(
      load_and_preprocess_image(train_1_8[i, "filepath"], 
                                target_height, target_width),
      batch_size = 1
  )
  plot_image_with_boxes(train_1_8$filepath[i],
                        paste0("train_", i),
                        train_1_8[i, 3:6] %>% as.matrix(),
                        scaled = TRUE,
                        box_pred = preds)
}

validation_1_8 <- head(df %>% filter(Training_Image == F), 8) %>%
  select(filepath,
         name,
         x_left_scaled, y_top_scaled, x_right_scaled, y_bottom_scaled)

for (i in 1:8) {
  preds <-
    model %>% predict(
      load_and_preprocess_image(validation_1_8[i, "filepath"], 
                                target_height, target_width),
      batch_size = 1
  )
  plot_image_with_boxes(validation_1_8$filepath[i],
                        paste0("validation_",i),
                        validation_1_8[i, 3:6] %>% as.matrix(),
                        scaled = TRUE,
                        box_pred = preds)
}

feature_extractor <- application_xception(
  include_top = FALSE,
  input_shape = c(224, 224, 3)
)

input <- feature_extractor$input
common <- feature_extractor$output %>%
  layer_flatten(name = "flatten") %>%
  layer_activation_relu() %>%
  layer_dropout(rate = 0.25) %>%
  layer_dense(units = 512, activation = "relu") %>%
  layer_batch_normalization() %>%
  layer_dropout(rate = 0.5)

regression_output <-
  layer_dense(common, units = 4, name = "regression_output")
class_output <- layer_dense(
  common,
  units = 200,
  activation = "softmax",
  name = "class_output"
)

model <- keras_model(
  inputs = input,
  outputs = list(regression_output, class_output)
)
model %>% freeze_weights(to = "flatten")

model %>% compile(
  optimizer = "adam",
  loss = list("mae", "sparse_categorical_crossentropy"),
  #loss_weights = list(
  #  regression_output = 0.05,
  #  class_output = 0.95),
  metrics = list(
    regression_output = custom_metric("iou", metric_iou),
    class_output = "accuracy"
  )
)
loc_class_generator <-
  function(data,
           target_height,
           target_width,
           shuffle,
           batch_size) {
    i <- 1
    function() {
      if (shuffle) {
        indices <- sample(1:nrow(data), size = batch_size)
      } else {
        if (i + batch_size >= nrow(data))
          i <<- 1
        indices <- c(i:min(i + batch_size - 1, nrow(data)))
        i <<- i + length(indices)
      }
      x <-
        array(0, dim = c(length(indices), target_height, target_width, 3))
      y1 <- array(0, dim = c(length(indices), 4))
      y2 <- array(0, dim = c(length(indices)))
      
      for (j in 1:length(indices)) {
        x[j, , , ] <-
          load_and_preprocess_image(data[[indices[j], "filepath"]], 
                                    target_height, target_width)
        y1[j, ] <-
          data[indices[j], c("x_left_scaled",
                             "y_top_scaled",
                             "x_right_scaled",
                             "y_bottom_scaled")] %>% as.matrix()
        y2[j] <-
          data[[indices[j], "Class_ID"]]
      }
      list(x, list(y1, y2))
    }
  }

train_gen <- loc_class_generator(
  df %>% filter(Training_Image == T),
  target_height = target_height,
  target_width = target_width,
  shuffle = TRUE,
  batch_size = batch_size
)

valid_gen <- loc_class_generator(
  df %>% filter(Training_Image == F),
  target_height = target_height,
  target_width = target_width,
  shuffle = FALSE,
  batch_size = batch_size
)
batch_size <- 10
history <- model %>% fit(
  train_gen,
  epochs = 20,
  steps_per_epoch = nrow(df %>% filter(Training_Image == T)) / batch_size,
  validation_data = valid_gen,
  validation_steps = nrow(df %>% filter(Training_Image == F)) / batch_size,
  callbacks = list(
    callback_model_checkpoint(
      file.path("loc_class", "weights.{epoch:02d}-{val_loss:.2f}.hdf5")
    ),
    callback_early_stopping(patience = 2)
  )
)
plot(history)
## `geom_smooth()` using formula 'y ~ x'

plot_image_with_boxes <- function(file_name,
                                  object_class,
                                  name,
                                  box,
                                  scaled = FALSE,
                                  class_pred = NULL,
                                  box_pred = NULL) {
  img <- image_read(file_name)
  if(scaled) img <- image_resize(img, geometry = "224x224!")
  img <- image_draw(img)
  x_left <- box[1]
  y_bottom <- box[2]
  x_right <- box[3]
  y_top <- box[4]
  rect(
    x_left,
    y_bottom,
    x_right,
    y_top,
    border = "cyan",
    lwd = 2.5
  )
  text(
    x_left,
    y_top,
    object_class,
    offset = 1,
    pos = 2,
    cex = 1.5,
    col = "cyan"
  )  
  if (!is.null(box_pred)){
    rect(box_pred[1],
         box_pred[2],
         box_pred[3],
         box_pred[4],
         border = "yellow",
         lwd = 2.5)
  }
  if (!is.null(class_pred)){
    text(
      box_pred[1],
      box_pred[2],
      class_pred,
      offset = 0,
      pos = 4,
      cex = 1.5,
      col = "yellow")
  }
  dev.off()
  img %>% image_write(paste0("CUB_200_2011/preds_", name, ".jpg"))
  plot(img)
}
train_1_8 <- df %>% filter(Training_Image == T) %>%
  select(filepath,
         name,
         x_left_scaled, y_top_scaled, x_right_scaled, y_bottom_scaled)

for (i in sample(1:nrow(train_1_8),8)) {
  preds <-
    model %>% predict(
      load_and_preprocess_image(train_1_8[i, "filepath"], 
                                target_height, target_width),
      batch_size = 1
  )
  class_id <- df %>% filter(Class_ID == (which(unlist(preds[2]) == max(unlist(preds[2])))-1)) %>% select(name) %>% head(1) %>% pull()
  plot_image_with_boxes(train_1_8$filepath[i],
                        class_id,
                        paste0("train_", i),
                        train_1_8[i, 3:6] %>% as.matrix(),
                        scaled = TRUE,
                        class_pred = class_id,
                        box_pred = unlist(preds[1])
  )
}

Check preds to truth

test_images <- df %>% filter(Training_Image == F) %>%
  select(filepath,
         name,
         Class_ID,
         x_left_scaled,
         y_top_scaled,
         x_right_scaled,
         y_bottom_scaled)

test_images$prediction <- NA

for (i in 1:nrow(test_images)) {
  preds <-
    model %>% predict(
      load_and_preprocess_image(test_images[i, "filepath"], 
                                target_height, target_width),
      batch_size = 1
  )
  pred_class_id <- which(unlist(preds[2]) == max(unlist(preds[2])))
  test_images$prediction[i] <- pred_class_id - 1
}

sum(test_images$Class_ID == test_images$prediction)/nrow(test_images)
## [1] 0.4508112